# Clean up
rm(list = ls())

# Set working directory; please set your own here
setwd("~/Dropbox/Fragmentation/fragmentation_replication_bjps/")

library(tidyverse)
library(foreign)

# Import data with main analyses
data <- read.dta("Results/analyses_final.dta")

# Defining line breaks in the type of outcome
levels(data$outcome_type) <- gsub("\\n", "\n", levels(data$outcome_type))

# Fixing levels in outcome and outcome_type variables
table(data$Outcome)
data$Outcome[data$Outcome == "Summary\\nmeasures (PCA)"] <- "Summary measures (PCA)"
data$Outcome[data$Outcome == "Repr underpriviliged\\ngroups (PCA)"] <- "Repr underpriviliged groups (PCA)"

table(data$outcome_type)
data$outcome_type[data$outcome_type == "Summary\\nmeasures"] <- "Summary\nmeasures"
data$outcome_type[data$outcome_type == "Descriptive\\nrepr of women"] <- "Descriptive\nrepr of women"
data$outcome_type[data$outcome_type == "Repr underpriviliged\\ngroups"] <- "Repr underpriviliged\ngroups"
data$outcome_type[data$outcome_type == "Other\\noutcomes"] <- "Other\noutcomes"

# SUbset results that matter
bandwidth_change <- data %>%
  filter(Sample == "Whole") %>%
  filter(FE == "None") %>%
  filter(id == "No. parties just above")  %>%
  filter(Predictor == "ENPP")

# First plotting the main results
bandwidth_change_plot <- bandwidth_change %>%
  mutate(Outcome = fct_reorder(Outcome, pca, .desc = TRUE)) %>%
  ggplot(aes(x = Bandwidth, y = Effect)) +  
  geom_line(aes(x = Bandwidth, y = Effect), size = 1, color = "blue") + 
  geom_line(aes(x = Bandwidth, y = ci_upper), linetype = "dashed", color = "black") +
  geom_line(aes(x = Bandwidth, y = ci_lower), linetype = "dashed", color = "black") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 50, linetype = "dashed",color = "green4",size = .7) +
  xlab("Upper boundary of the bandwidth (percentage above the threshold)") +
  scale_y_continuous(name = "Effect of one extra party in parliament (standardized)", 
                     limits = c(-1, 1)) +
  theme_bw() +
  facet_wrap(~Outcome) +
  ggtitle("2SLS estimates") +
  theme(plot.title = element_text(face="bold", size = 13))
bandwidth_change_plot

ggsave("Plots/bandwidth_change_plot.png", plot = bandwidth_change_plot, width = 25, height = 20, units = "cm")
ggsave("Drafts/bandwidth_change_plot.png", plot = bandwidth_change_plot, width = 25, height = 20, units = "cm")

# And now the first stage as we move around the bandwidth
bw_change_firststage_plot <- bandwidth_change %>%
  mutate(Outcome = fct_reorder(Outcome, pca, .desc = TRUE)) %>%
  ggplot(aes(x = Bandwidth, y = fstat)) +  
  geom_line(aes(x = Bandwidth, y = fstat), size = 1, color = "blue") + 
  geom_hline(yintercept = 1.96, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 50, linetype = "dashed",color = "green4",size = .7) +
  xlab("Upper boundary of the bandwidth (percentage above the threshold)") +
  scale_y_continuous(name = "F-statistic in the first stage") +
  theme_bw() +
  facet_wrap(~Outcome) +
  ggtitle("First stage") +
  theme(plot.title = element_text(face = "bold", size = 13))
bw_change_firststage_plot

ggsave("Plots/bandwidth_change_first_plot.png", plot = bw_change_firststage_plot, width = 25, height = 20, units = "cm")
ggsave("Drafts/bandwidth_change_first_plot.png", plot = bw_change_firststage_plot, width = 25, height = 20, units = "cm")

# Making these two plots into a single one
bw_change_both <- grid.arrange(
  bandwidth_change_plot,
  bw_change_firststage_plot,
  nrow = 2,
  ncol = 1
)
bw_change_both

# Save the plot
ggsave("Plots/figd2.png", plot = bw_change_both, width = 32, height = 50, units = "cm")